# 将数据集进行时间因素和天气因素的合并
#step3 数据合并
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv(r'data\Area1_Load_clean.csv')
# 只保留YMD，weekday，is_workday，Holiday_Info，Holiday_Name
df = df[['YMD', 'weekday', 'is_workday', 'Holiday_Info', 'Holiday_Name']]
df = df[(df['YMD'] >= '2012-01-01') & (df['YMD'] <= '2015-01-11')]
df.head()
df1 = pd.read_csv(r'data\Area1_Load_hour.csv')
df1 = df1[(df1['Time'] >= '2012-01-01') & (df1['Time'] <= '2015-01-11')]
df1['Time'] = pd.to_datetime(df1['Time'])
# 根据时间进行合并
df1['Time'] = df1['Time'].dt.strftime('%Y-%m-%d')
df = pd.merge(df1, df, left_on='Time', right_on='YMD', how='left')
df.head()

# 天气数据
df2 = pd.read_csv(r'data\Area1_Weather_clean.csv')
df2 = df2[(df2['YMD'] >= '2012-01-01') & (df2['YMD'] <= '2015-01-11')]
df2['YMD'] = pd.to_datetime(df2['YMD'])
df2['Time'] = df2['YMD'].dt.strftime('%Y-%m-%d')
df = pd.merge(df, df2, left_on='Time', right_on='Time', how='left')
df.head()

# 删除无用的列YMD_x，YMD_y
df = df.drop(['YMD_x', 'YMD_y'], axis=1)
# 删除无用的列Holiday_Name
df = df.drop(['Holiday_Name'], axis=1)

df.to_csv(r'data\Area1_Load_Weather_Time.csv', index=False)

df.head()